# SET DIRECTORY TO SOURCE FILE 

setwd("../../")

sink("tables/table4a_log.txt") #create log 

library(tidyverse)
library(plyr)


# Effect of heuristics on MC approval

# Note: for this analysis, can't use respondents in the other project as a control group,
# since for the other project other information was shown before the MC approval question was asked.
# So we are just going to use within-subject variation among our treatment group.
dat <- read.csv('data/cleaned/heuristics_cleaned_wide.csv', stringsAsFactors = FALSE) %>%
  filter(project == 'heuristics')
dat$participant_republican <- ifelse(dat$pid > 4, 1, 0)
dat$MC_republican <- ifelse(dat$reppartyoneletter == 'R', 1, 0)
dat$participant_MC_party_match = dat$participant_republican == dat$MC_republican
#summary(lm(mcratingscale ~ participant_MC_party_match, dat)) # sanity checking

# Our dependent variable: the respondents' favorability toward their Member of Congress
#sd(dat$mcratingscale)

# Data at the level of votes/SIGs potentially shown
votes <- read.csv('data/Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)

# Preliminary step: identify the issues where the liberal position is support and the issues where the liberal position is oppose
liberal.groups <- c('AFL-CIO', 'NARAL Pro-Choice America', 'Human Rights Campaign',
                    'League of Conservation Voters', 'National Parks Conservation Association',
                    'National Active and Retired Federal Employees Association')
conservative.groups <- setdiff(votes$sig_name, liberal.groups)
#liberal.groups
#conservative.groups

# Iterate over votes to create new columns in dat for each vote possibly shown
dat$sig_rating_supportive_vote_shown <- NA
dat$sig_signal_matches_voter_vote_shown <- NA

for(i in 1:nrow(votes)) {
  sig_id <- votes$sig_id[i]
  sig_name <- votes$sig_name[i]
  house_vote_id <- votes$house_vote_id[i]
  stopifnot(sig_name %in% c(liberal.groups, conservative.groups))
  sig.conservative <- ifelse(sig_name %in% conservative.groups, 1, -1)
  
  # If house vote is missing, need to set rating to NA since it would not be shown
  dat[is.na(dat[,paste0('repactualvote', house_vote_id)]),
      paste0('reprating', sig_id)] <- NA
  
  # Does the respondent have a conservative bill preference on this bill?
  # All the bills are conservative except the prohibit use of funds bill, 56612.
  dat[,paste0('bill_pref_conservative', house_vote_id)] <-
    recode(dat[,paste0('ownview', house_vote_id)] , `0` = -1, `1` = 1) # recode 0 to -1
  if(house_vote_id == 56612) dat[,paste0('bill_pref_conservative', house_vote_id)] <-
    -1 * dat[,paste0('bill_pref_conservative', house_vote_id)]
  # print(summary(lm( dat[,paste0('bill_pref_conservative', house_vote_id)]  ~ dat$participant_republican)))
  
  # did the SIG give a positive or negative rating to the MC?
  dat[,paste0('sig_rating_supportive_', sig_id)] <-
    ifelse(dat[,paste0('reprating', sig_id)] < 50, -1, 1)
  # print(summary(lm( dat[,paste0('sig_rating_supportive_', sig_id)] ~ dat[,paste0('reprating', sig_id)] )))
  
  # code heuristic_signal such that 1 means the SIG rating sent a conservative signal about the MC's vote
  # and -1 meant it sent a liberal signal about the MC's vote
  dat[,paste0('sig_signal_conservative_', sig_id)] <-
    dat[,paste0('sig_rating_supportive_', sig_id)] * sig.conservative
  # print(summary(lm(dat[,paste0('sig_signal_conservative_', sig_id)] ~ dat$MC_republican)))
  
  # Define sig_signal_matches_voter as whether the ideological direction of the respondent's preference on an issue
  # matches the ideological direction of the signal sent by the endorsement
  dat[,paste0('sig_signal_matches_voter_', sig_id)] <-
    dat[,paste0('bill_pref_conservative', house_vote_id)] * dat[,paste0('sig_signal_conservative_', sig_id)]
  # print(summary(lm(dat$participant_MC_party_match ~ dat[,paste0('sig_signal_matches_voter_', sig_id)])))
  
  # Change to binary for easier interpretation
  dat[, paste0('sig_rating_supportive_', sig_id)] <-
    as.numeric(dat[, paste0('sig_rating_supportive_', sig_id)] == 1)
  dat[, paste0('sig_signal_matches_voter_', sig_id)] <-
    as.numeric(dat[, paste0('sig_signal_matches_voter_', sig_id)] == 1)
  
  # If this was the vote that was shown, load this vote into the proper variable
  this.vote.shown <- which(dat$randomrating_sig_id == sig_id)
  dat$sig_rating_supportive_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_rating_supportive_', sig_id)]
  dat$sig_signal_matches_voter_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_signal_matches_voter_', sig_id)]
}

# # Control for average values of treatment within version of survey shown (a or b)
# dat$sig_rating_supportive_avg <- NA
# dat$sig_signal_matches_voter_avg <- NA
# for(version in c('a', 'b')) {
#   sig.ids <- votes$sig_id[votes$split == version]
#   
#   # Average of whether SIG ratings were supportive
#   dat$sig_rating_supportive_avg[dat$survey_version == version] <-
#     rowMeans(dat[dat$survey_version == version, paste0('sig_rating_supportive_', sig.ids)], na.rm = TRUE)
#   
#   # Average of whether signal from SIG matches the voter
#   dat$sig_signal_matches_voter_avg[dat$survey_version == version] <-
#     rowMeans(dat[dat$survey_version == version, paste0('sig_signal_matches_voter_', sig.ids)], na.rm = TRUE)
# }


# Control for average values of treatment within version of survey shown (a or b)
dat$sig_rating_supportive_avg <- NA
dat$sig_signal_matches_voter_avg <- NA

dat$prob.s0m0 <- NA
dat$prob.s0m1 <- NA
dat$prob.s1m0 <- NA
dat$prob.s1m1 <- NA

sig.ids <- votes$sig_id[votes$split == 'a']
s<-1
m<-1
version<-'a'
#dat$sig_rating_supportive_1034

for(version in c('a', 'b')) {
  sig.ids <- votes$sig_id[votes$split == version]
  
  # Average of each arm
  for(s in c(0,1)) {
    for(m in c(0,1)) {
      dat[dat$survey_version == version, paste0('prob.s', s, 'm', m)] <-
        round(
          rowMeans(as.matrix(dat[dat$survey_version == version, paste0('sig_rating_supportive_', sig.ids)] == s) *
                     as.matrix(dat[dat$survey_version == version, paste0('sig_signal_matches_voter_', sig.ids)] == m)
                   , na.rm = TRUE)
          , 5)
    }
  }
  # Average of whether SIG ratings were supportive
  dat$sig_rating_supportive_avg[dat$survey_version == version] <-
    rowMeans(dat[dat$survey_version == version, paste0('sig_rating_supportive_', sig.ids)], na.rm = TRUE)
  
  # Average of whether signal from SIG matches the voter
  dat$sig_signal_matches_voter_avg[dat$survey_version == version] <-
    rowMeans(dat[dat$survey_version == version, paste0('sig_signal_matches_voter_', sig.ids)], na.rm = TRUE)
}
stopifnot(round(dat$prob.s0m0 + dat$prob.s0m1 + dat$prob.s1m0 + dat$prob.s1m1, 4) == 1)
dat <- dat %>%
  mutate(sameprob.fes = group_indices(., prob.s0m0, prob.s0m1, prob.s1m0, prob.s1m1)) %>%
  mutate(sameprob.fes = factor(sameprob.fes))



# Determine which fixed effect cells contribute to identification in horserace regressions
dat <- dat %>%
  mutate(prob4max = pmax(prob.s0m0, prob.s0m1, prob.s1m0, prob.s1m1),
         prob.s0 = prob.s0m0 + prob.s0m1,
         prob.m0 = prob.s0m0 + prob.s1m0,
         probs.nonzero = prob.s0 > 0 & prob.s0 < 1 & prob.m0 > 0 & prob.m0 < 1) %>%
  group_by(sameprob.fes) %>%
  add_tally(name = 'n.in.fe.cell') %>%
  mutate(is.used.in.horserace.identification = n.in.fe.cell >= 2 & prob4max < .9999999,
         # Strict version removes cases that only contribute to identification for one of the two coefficients
         is.used.in.horserace.identification.strict = is.used.in.horserace.identification & probs.nonzero)


# Include PID as a control for precision
dat$PID_agree = dat$pid # more positive values = respondent is more republican
dat$PID_agree[dat$MC_republican == 0] = 8 - dat$PID_agree[dat$MC_republican == 0]



write.csv(dat, 'data/cleaned/heuristics_4a.csv', row.names = F)




## Regress respondents' support for their member of Congress on the proportion of votes on which they agree, and whether the SIG heuristic suggests policy preference congruence
# fixed effects for sig_signal_matches_voter_avg
m_agreement_fe = lm(mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      factor(sig_signal_matches_voter_avg) + pid*MC_republican, dat)
#summary(m_agreement_fe)

m_agreement_fe.withusedinfinal = lm(mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      factor(sig_signal_matches_voter_avg) + pid*MC_republican, dat,
                      subset = is.used.in.horserace.identification)
#summary(m_agreement_fe.withusedinfinal)

# An alternate explanation is that, since interest groups are named so confusingly, a positive endorsement from ANY group will increase approval. 
# need to control for the average heuristic ratings
# fixed effects for sig_rating_supportive_avg
m_naive_fe = lm(mcratingscale ~ sig_rating_supportive_vote_shown +
                  as.factor(sig_rating_supportive_avg) + pid*MC_republican, dat)
#summary(m_naive_fe)

m_naive_fe.withusedinfinal = lm(mcratingscale ~ sig_rating_supportive_vote_shown +
                  as.factor(sig_rating_supportive_avg) + pid*MC_republican, dat,
                  subset = is.used.in.horserace.identification)
#summary(m_naive_fe.withusedinfinal)

# Horserace with interaction of FEs
m_horserace_fe = lm(mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) +
                      pid*MC_republican, dat)
#summary(m_horserace_fe)


m_horserace_fe.withusedinfinal = lm(mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) +
                      pid*MC_republican, dat, subset = is.used.in.horserace.identification)
#summary(m_horserace_fe.withusedinfinal)


m_horserace_fe.strict = lm(mcratingscale ~ sig_signal_matches_voter_vote_shown +
                      sig_rating_supportive_vote_shown +
                      factor(sameprob.fes) +
                      pid*MC_republican, dat, subset = is.used.in.horserace.identification.strict)
#summary(m_horserace_fe.strict)


#Table 4a
apsrtable::apsrtable(m_agreement_fe, m_naive_fe, m_horserace_fe,m_horserace_fe.withusedinfinal,m_horserace_fe.strict,
                     omitcoef = expression(grep("factor",coefnames)), stars = 'default')


sink()